In [1]:
    
import pymysql
db = pymysql.connect(
    "db.fastcamp.us",
    "root",             
    "dkstncks",
    "sakila",
    charset='utf8',
)
rental_df = pd.read_sql("SELECT * FROM rental;", db)
    
In [7]:
    
rental_df = rental_df[["rental_id", "rental_date"]]
    
In [8]:
    
rental_df["month"] = rental_df["rental_date"].apply(lambda x: str(x)[:7])
rental_df.head()
    
    Out[8]:
In [9]:
    
month_groups = rental_df.groupby("month")
    
In [12]:
    
month_groups.get_group("2005-05")
    
    Out[12]:
In [10]:
    
rental_df["month"].unique()
    
    Out[10]:
In [16]:
    
len(month_groups.get_group("2005-05"))
    
    Out[16]:
In [13]:
    
pd.DataFrame([
        {
            "month": month,
            "rental count": len(month_groups.get_group(month)),
        }
        for month
        in rental_df["month"].unique()
    ])
    
    Out[13]:
In [18]:
    
rental_df.groupby("month")
    
    Out[18]:
In [17]:
    
rental_df.groupby("month").size()
    
    Out[17]:
In [19]:
    
rental_df.groupby("month").agg({"rental_id": np.size})
    
    Out[19]:
In [24]:
    
SQL_QUERY = """
    SELECT LEFT(rental_date, 7) "Month", COUNT(*) "Total Rental"
    FROM rental
    GROUP BY Month
    ;
"""
pd.read_sql(SQL_QUERY, db)
    
    Out[24]:
In [39]:
    
SQL_QUERY = """
    SELECT *
    FROM payment
    ;
"""
payment_df = pd.read_sql(SQL_QUERY, db)
    
In [40]:
    
payment_df = payment_df[["payment_date", "payment_id", "amount"]]
    
In [41]:
    
payment_df.head(1)
    
    Out[41]:
In [42]:
    
payment_df["month"] = payment_df["payment_date"].apply(lambda x: str(x)[:7])
    
In [46]:
    
payment_df.groupby("month").agg({"payment_id": np.size, "amount": [np.sum, np.mean]})
    
    Out[46]:
In [47]:
    
# 마지막으로 sql
    
In [54]:
    
SQL_QUERY = """
    SELECT
        LEFT(payment_date, 7) Month,
        count(*) "Total Payment",
        SUM(amount) "Total Revenue",
        AVG(amount) "Average Payment"
    FROM payment
    GROUP BY Month
    ;
"""
pd.read_sql(SQL_QUERY, db)
    
    Out[54]: